{
 "cells": [
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Predict the house value"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "is_executing": true
   },
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "df_housing = pd.read_csv(\n",
    "    \"https://raw.githubusercontent.com/huangjia2019/house/master/house.csv\")\n",
    "df_housing\n",
    "\n",
    "# Feature X, without median_house_value 房价\n",
    "X = df_housing.drop(\"median_house_value\", axis=1)\n",
    "Y = df_housing.median_house_value  # Label Y, just the median_house_value\n",
    "\n",
    "\n",
    "\n",
    "# divide 80% to train and left 20% for predict\n",
    "from sklearn.model_selection import train_test_split  # NOQA1\n",
    "_X_train, _X_test, _Y_train, _Y_test = train_test_split(\n",
    "    X, Y, test_size=0.2, random_state=0)\n",
    "\n",
    "\n",
    "# Use linear regression (线性回归) as model to train\n",
    "from sklearn.linear_model import LinearRegression  # NOQA\n",
    "import numpy as np  # NOQA\n",
    "model = LinearRegression()  # init the model\n",
    "model.fit(_X_train, _Y_train)  # train with the data pair\n",
    "\n",
    "# do predict\n",
    "Y_pred = model.predict(_X_test)\n",
    "print(\"--True house value (testing set):\\t\",  np.array(_Y_test))\n",
    "print(\"--Predict house value (testing set):\\t\", np.array(Y_pred))\n",
    "\n",
    "score = model.score(_X_test, _Y_test)\n",
    "print(\"--The comprehensive score: \\t\", score)\n",
    "if score > 0.6:\n",
    "    print(\"Congratualtions! You passed!\")\n",
    "\n",
    "\"Data View\"\n",
    "import matplotlib.pyplot as plt  # NOQA\n",
    "# Draw 散点图 scatter to show `median income` and `median housing value`  locale\n",
    "plt.scatter(_X_test.median_income, _Y_test, color='yellow')\n",
    "# Draw 线性回归 linear regression line , from feature to label....\n",
    "plt.plot(_X_test.median_income, Y_pred, color='green', linewidth=1)\n",
    "plt.xlabel(\"Median Income\")\n",
    "plt.xlabel(\"Median Housing Value\")\n",
    "plt.show()\n",
    "\n"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "1. Load data set"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "\n",
    "from keras.datasets import mnist\n",
    "(train_X_img, train_Y_lable), (test_X_img, test_Y_lable) = mnist.load_data()\n",
    "print(train_X_img.shape)\n",
    "print(train_X_img[0].shape)\n",
    "# print(X_train_iamge[0])\n",
    "print(train_Y_lable[0])"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "2. preprocessor to these dataset"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from keras.utils import to_categorical  # NOQA\n",
    "# add one more dim to these\n",
    "X_train = train_X_img.reshape(60000, 28, 28, 1)\n",
    "X_test = test_X_img.reshape(10000, 28, 28, 1)\n",
    "\n",
    "# convert to one-hot encode\n",
    "y_train = to_categorical(train_Y_lable, 10)\n",
    "y_test = to_categorical(test_Y_lable, 10)\n",
    "\n",
    "print(\"The shape of train tensor: \", X_train.shape)\n",
    "print(\"First label of train\", y_train[0])"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "3. choose the model of ml  \n",
    "\n",
    "> There are many algorithm, mainly with servals below:\n",
    "   - linear (linerar regression, logic gregression)\n",
    "   - non-linear (支持向量机， k最邻近分类等...)\n",
    "   - tree (决策树，随机森林，梯度提升树等..)\n",
    "   - neural-network (artifical 人工, convolution 卷积, long shor-term memory 长短期记忆... neural-network)\n",
    "\n",
    "> Maybe the convolution neral-network is better for the image recongnition"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from keras import models\n",
    "from keras.layers import Dense, Flatten, Conv2D, MaxPooling2D, Dropout\n",
    "\n",
    "\"Use 序贯方式 to build the model, state machine?\"\n",
    "model = models.Sequential()\n",
    "\n",
    "\"add layers\"\n",
    "# add Conv2D layer(32 with 3x3 kernal), and specify the input data type\n",
    "model.add(Conv2D(32, (3, 3), activation='relu', input_shape=(28, 28, 1)))  # 二维卷积层\n",
    "model.add(MaxPooling2D(pool_size=(2, 2)))  # 最大池化层\n",
    "\n",
    "model.add(Conv2D(64, (3, 3), activation='relu'))\n",
    "model.add(MaxPooling2D(pool_size=(2, 2)))\n",
    "model.add(Dropout(0.25))  # drop out to aviod overfitting(过拟合)\n",
    "\n",
    "model.add(Flatten())\n",
    "\n",
    "model.add(Dense(128, activation=\"relu\"))  # full connection layer\n",
    "model.add(Dropout(0.5))  # drop out to aviod overfitting(过拟合)\n",
    "\n",
    "# use soft max to filter(activatie), output 10 dimension codes\n",
    "model.add(Dense(10, activation=\"softmax\"))\n",
    "\n",
    "\"compile\"\n",
    "model.compile(optimizer=\"rmsprop\", loss=\"categorical_crossentropy\",\n",
    "              metrics=[\"accuracy\"])  # specify the index(评估指标) of validation process\n"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "4. start train"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "\"will display ever round's accuracy(训练数据), val_accuracy(验证数据)\"\n",
    "model.fit(X_train, y_train,\n",
    "          # part of (30%) of train label will split into validation set\n",
    "          validation_split=0.3,\n",
    "          epochs=5,  # 5 ruounds for train\n",
    "          batch_size=128,\n",
    "          )\n"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "5. Apply to test dataset"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "\n",
    "\"Use the trained model to test dataset\"\n",
    "score = model.evaluate(X_test, y_test)\n",
    "print(score)\n",
    "print(\"The accuracy of predict in test dataset: \",score[1])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import matplotlib.pyplot as plt\n",
    "\"Additional testting\"\n",
    "\n",
    "pred = model.predict(X_test[0].reshape(1, 28, 28, 1))\n",
    "print(pred[0], \"\\n conveting foramt.... \\n\", pred.argmax())\n",
    "# print this test image\n",
    "plt.imshow(X_test[0].reshape(28, 28), cmap=\"Greys\")\n"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "\n",
    "### Bostion housing value"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "from sklearn.linear_model import LinearRegression\n",
    "import matplotlib.pyplot as plt\n",
    "from keras.datasets import boston_housing\n",
    "import pandas as pd\n",
    "\n",
    "(x_train, y_train), (x_test, y_test) = boston_housing.load_data()\n",
    "x_train_table = pd.DataFrame(x_train)\n",
    "y_train_table = pd.DataFrame(y_train)\n",
    "# table_0\n",
    "# table_1\n",
    "\n",
    "\n",
    "model = LinearRegression()  # init the model\n",
    "model.fit(x_train, y_train)  # train with the data pair\n",
    "\n",
    "# do predict\n",
    "Y_pred = model.predict(x_test)\n",
    "print(\"--True house value (testing set):\\t\",  np.array(y_test)[0:5])\n",
    "print(\"--Predict house value (testing set):\\t\", np.array(Y_pred)[0:5])\n",
    "\n",
    "score = model.score(x_test, y_test)\n",
    "print(\"--The comprehensive score: \\t\", score)\n",
    "if score > 0.6:\n",
    "    print(\"===================================\")\n",
    "    print(\"Congratualtions! You passed!\")\n",
    "    print(\"===================================\")\n",
    "\n",
    "\"Data View\"\n",
    "\n",
    "\n",
    "# Draw 线性回归 linear regression line , from feature to label....\n",
    "\n",
    "label = y_test\n",
    "for i in range(len(x_test[0])):\n",
    "    feature = x_test[:, [i]]\n",
    "    print(feature.shape)\n",
    "    print(label.shape)\n",
    "    plt.xlabel(f\"The {i} col featrue\")\n",
    "    plt.ylabel(\"Median Housing Value\")\n",
    "\n",
    "    plt.scatter(feature, label, color='yellow')\n",
    "    plt.plot(feature, Y_pred, color='green', linewidth=1)\n",
    "    plt.show()\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "\"Testing for section 2: python basic, numpy, probability\"\n",
    "# (x, y) = [\n",
    "#     (-5, 1),\n",
    "#     (-5, 1),\n",
    "#     (-5, 1),\n",
    "#     (-5, 1)\n",
    "# ]\n",
    "\n",
    "import matplotlib.pyplot as plt\n",
    "import numpy as np\n",
    "\n",
    "\n",
    "def f(x): return 2*x + 1\n",
    "\n",
    "\n",
    "x = np.arange(-2, 2, step=0.1)\n",
    "y = f(x)\n",
    "\n",
    "# plt.xlabel(\"x\")\n",
    "# plt.ylabel(\"y\")\n",
    "# plt.plot(x,y)\n",
    "# plt.show()\n",
    "\n",
    "A = np.array([1, 2, 3, 4, 5])\n",
    "B = np.array([\n",
    "    [5],\n",
    "    [4],\n",
    "    [3],\n",
    "    [2],\n",
    "    [1]\n",
    "])\n",
    "\n",
    "_add = A+B\n",
    "print(_add.shape,'\\n', _add)\n",
    "_dot = A.dot(B)\n",
    "print(_dot.shape,'\\n', _dot)\n",
    "_cross = A*B\n",
    "print(_cross.shape,'\\n', _cross)\n"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.11"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
